package com.chance.cc.crawler.development.bootstrap.bilibili;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.downloader.HttpConfig;
import com.chance.cc.crawler.core.downloader.proxy.Proxy;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRecord;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.development.controller.DevCrawlerController;
import org.apache.commons.lang3.StringUtils;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.*;

/**
 * @author lt
 * @version 1.0
 * @date 2021-02-01 18:52:54
 * @email okprog@sina.com
 */
public class BilibiliStart {

    private static final String domain = "bilibili";
    private static final String nike_site = "nike_keyword";
    private static final String medical_site = "medical_keyword";

    private static final String siteBiz = "video-realtime";

    private static Proxy proxy = new Proxy();
    static {
        //代理配置
        //HL89Q19E86E2987D
        //71F33D94CE5F7BF2
        proxy.setHost("http-dyn.abuyun.com");
        proxy.setPort(9020);
        proxy.setUsername("HL89Q19E86E2987D");
        proxy.setPassword("71F33D94CE5F7BF2");
    }

    public static void main(String[] args) {
        articleRecord();
    }

    /**
     * 查询record
     */
    private static void keywordsRecord(){
        //关键词 source record
        CrawlerRequestRecord keywordCrawlerRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain + "_keywords",turnPageItem)
                .httpUrl("http://192.168.1.215:9599//v1/meta/"+domain+"/keys?site="+medical_site)
                .requestLabelTag(supportSource)
                .requestLabelTag(internalDownload)
                .build();

        CrawlerRequestRecord requestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, CrawlerEnum.CrawlerRequestType.turnPage)
                .recordKey("https://www.bilibili.com/")
                .httpUrl("https://www.bilibili.com/")
                .releaseTime(System.currentTimeMillis())
                .httpConfig(HttpConfig.me(domain))
                .domain(domain)
                .filter(CrawlerEnum.CrawlerRecordFilter.keyOrDateRange)
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(domain))
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null))
                .proxy(proxy)
                .needParsed(false)
                .needWashed(false)
                .build();
        requestRecord.setDownload(false);
        requestRecord.setSkipPipeline(true);

        requestRecord.tagsCreator().bizTags().addDomain(domain);
        requestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        //添加评论去重信息
        CrawlerRecord filterCrawlerRecord = new CrawlerRecord();
        filterCrawlerRecord.setFilter(CrawlerEnum.CrawlerRecordFilter.keyOrDateRange);
        filterCrawlerRecord.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-",filter,domain,"comment")));
        filterCrawlerRecord.addFilterInfo(FilterUtils.dateRangeFilterInfo(24,null));
        requestRecord.tagsCreator().bizTags().addCustomKV("comment_record_filter_info", JSON.toJSONString(filterCrawlerRecord));

        DevCrawlerController devCrawlerController = DevCrawlerController.builder()
                .triggerInfo(domain, domain + "_trigger", System.currentTimeMillis(), domain + "_job")
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain)) //内存队列
                .consoleResultPipeline() //控制台输t出
                .fileResultPipeline("D:\\chance\\data\\bilibili\\bilibili_"+medical_site+".json",false)
                .requestRecord(requestRecord)
                .supportRecord(keywordCrawlerRecord)
                .crawlerThreadNum(10)
                .build();

        //是否采集评论
        devCrawlerController.getCrawlerJob().getScheduleTags().getCategoryTag().addLabelTag(CrawlerEnum.CrawlerDataType.comment.enumVal());
        devCrawlerController.start();
    }

    private static void articleRecord(){
        String url = "https://www.bilibili.com/video/BV1a54y177o8";

        CrawlerRequestRecord requestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, turnPageItem)
                .httpUrl(url)
                .releaseTime(System.currentTimeMillis())
                .httpConfig(HttpConfig.me(domain))
                .domain(domain)
                .filter(CrawlerEnum.CrawlerRecordFilter.dateRange)
                .addFilterInfo(FilterUtils.memoryFilterKeyInfo(domain))
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 30,null))
                .proxy(proxy)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .build();

        requestRecord.tagsCreator().bizTags().addDomain(domain);
        requestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        //添加评论去重信息
        CrawlerRecord filterCrawlerRecord = new CrawlerRecord();
        filterCrawlerRecord.setFilter(CrawlerEnum.CrawlerRecordFilter.dateRange);
        filterCrawlerRecord.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-",filter,domain,"comment")));
        filterCrawlerRecord.addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 30,null));
        requestRecord.tagsCreator().bizTags().addCustomKV("comment_record_filter_info", JSON.toJSONString(filterCrawlerRecord));

        DevCrawlerController devCrawlerController = DevCrawlerController.builder()
                .triggerInfo(domain, domain + "_trigger", System.currentTimeMillis(), domain + "_job")
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain)) //内存队列
                .consoleResultPipeline() //控制台输t出
                .requestRecord(requestRecord)
                .build();

        //是否采集评论
        devCrawlerController.start();
    }
}
